home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
SGI Freeware 2002 November
/
SGI Freeware 2002 November - Disc 2.iso
/
dist
/
fw_glimpse.idb
/
usr
/
freeware
/
src
/
glimpse-3.0
/
get_filename.c.z
/
get_filename.c
Wrap
C/C++ Source or Header
|
1997-09-09
|
12KB
|
368 lines
/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */
#include <sys/types.h>
#include <sys/stat.h>
#include "glimpse.h"
#include <fcntl.h>
#define CHAR unsigned char
/* ----------------------------------------------------------------------
get_filenames()
input: an index table, (an index vector, i-th entry is ON if
i-th partition is to be searched.), the partition table in src_index_set[]
and the list of all files in "NAME_LIST".
output: the list of filenames to be searched.
------------------------------------------------------------------------- */
#if BG_DEBUG
extern FILE *debug;
#endif /*BG_DEBUG*/
extern int p_table[MAX_PARTITION];
extern CHAR **GTextfiles;
extern CHAR **GTextfilenames;
extern int *GFileIndex;
extern int GNumfiles;
extern CHAR GProgname[];
extern CHAR FileNamePat[];
extern int MATCHFILE;
extern int agrep_outpointer;
extern int mask_int[32];
extern int OneFilePerBlock;
extern char INDEX_DIR[MAX_LINE_LEN];
extern unsigned int *multi_dest_index_set[MAXNUM_PAT];
extern int file_num; /* in index/io.c */
int bigbuffer_size;
char *bigbuffer = NULL; /* constant buffer to read all filenames in NAME_LIST */
char *outputbuffer = NULL; /* keeps changing: used for -F search via memagrep */
extern int REAL_PARTITION, REAL_INDEX_BUF, MAX_ALL_INDEX, FILEMASK_SIZE;
read_filenames()
{
struct stat st;
unsigned char buffer[MAX_NAME_SIZE];
char *currptr;
int i;
/* one time processing: assumes during one run of glimpse, the index remains constant! */
if (bigbuffer == NULL) {
FILE *fp = fopen(NAME_LIST, "r");
if (fp == NULL) {
fprintf(stderr, "Can't open for reading: %s/%s\n", INDEX_DIR, NAME_LIST);
exit(2);
}
if (-1 == stat(NAME_LIST, &st)) {
fclose(fp);
fprintf(stderr, "Can't stat: %s/%s\n", INDEX_DIR, NAME_LIST);
exit(2);
}
fgets(buffer, MAX_NAME_SIZE, fp);
bigbuffer_size = st.st_size - strlen(buffer);
sscanf(buffer, "%d", &file_num);
if ((file_num < 0) || (file_num > MaxNum24bPartition)) {
fclose(fp);
fprintf(stderr, "Error in reading: %s/%s\n", INDEX_DIR, NAME_LIST);
exit(2);
}
initialize_data_structures(file_num);
for (i=0; i<MAXNUM_PAT; i++) {
multi_dest_index_set[i] = (unsigned int *)my_malloc(sizeof(int)*REAL_PARTITION);
memset(multi_dest_index_set[i], '\0', sizeof(int) * REAL_PARTITION);
}
bigbuffer = (char *)my_malloc(bigbuffer_size + MAX_PAT + 2); /* The whole file + place to store -F's pattern */
if (bigbuffer != NULL) outputbuffer = (char *)my_malloc(FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE); /* Space for max# files per partition */
if (outputbuffer != NULL) GTextfiles = (CHAR **) my_malloc(sizeof(CHAR *) * file_num);
if (GTextfiles != NULL) GTextfilenames = (CHAR **) my_malloc(sizeof(CHAR *) * file_num);
if (GTextfilenames != NULL) GFileIndex = (int *)my_malloc(sizeof(int) * file_num);
if (bigbuffer == NULL || outputbuffer == NULL || GTextfiles == NULL || GTextfilenames == NULL || GFileIndex == NULL) {
fclose(fp);
fprintf(stderr, "%s: my_malloc failure in %s:%d!\n", GProgname, __FILE__, __LINE__);
exit(2); /* No point freeing memory */
}
if (bigbuffer_size != fread(bigbuffer, 1, bigbuffer_size, fp)) {/* read in whole file in CONTIGUOUS memory */
fclose(fp);
fprintf(stderr, "Error in reading: %s/%s\n", INDEX_DIR, NAME_LIST);
exit(2); /* No point freeing memory */
}
memset(bigbuffer+bigbuffer_size, '\n', MAX_PAT + 2);
for (i=0, currptr = bigbuffer; i<file_num && currptr < bigbuffer + bigbuffer_size; i++, currptr ++) {
GTextfilenames[i] = (unsigned char *)currptr;
while (*currptr != '\n') currptr ++;
}
}
return 0;
}
get_filenames(index_vect, argc, argv, dummylen, dummypat, file_num)
int *index_vect;
int argc; /* the arguments to agrep for -F */
char *argv[];
int dummylen;
CHAR dummypat[];
int file_num;
{
int i=0,j, ret;
int start, end, k, prevk;
int filesseen;
char *beginptr, *endptr;
#if BG_DEBUG
fprintf(debug, "get_filenames(): the following partitions are ON\n");
for(i=0; i<((OneFilePerBlock > 0) ? round(file_num, 8*sizeof(int)) : MAX_PARTITION); i++)
if(index_vect[i]) fprintf(debug, "i=%d,%x\n", i, index_vect[i]);
#endif /*BG_DEBUG*/
GNumfiles = 0;
filesseen = 0;
endptr = beginptr = bigbuffer;
if(MATCHFILE == OFF) { /* just copy the filenames */
if (OneFilePerBlock) {
for (i=0; i<round(file_num, 8*sizeof(int)); i++) {
if (index_vect[i] == 0) continue;
for (j=0; j<8*sizeof(int); j++) {
if (!(index_vect[i] & mask_int[j])) continue;
start = i*8*sizeof(int) + j;
end = start + 1;
#if BG_DEBUG
fprintf(debug, "start=%d, end=%d\n", start, end);
#endif /*BG_DEBUG*/
/*
* skip over so many filenames and get the filenames to copy.
* NOTE: successive "start"s ALWAYS increase.
*/
while(filesseen < start) {
while(*beginptr != '\n') beginptr ++;
beginptr ++; /* skip over '\n' */
filesseen ++;
}
endptr = beginptr;
while (filesseen < end) {
while(*endptr != '\n') endptr ++;
if (endptr == beginptr + 1) goto end_of_loop1; /* null name of non-existent file */
*endptr = '\0';
/* return with all the names you COULD get */
if ((GTextfiles[GNumfiles] = (CHAR *)strdup(beginptr)) == NULL) {
*endptr = '\n';
fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__);
return;
}
GFileIndex[GNumfiles] = i*8*sizeof(int) + j;
*endptr = '\n';
if (++GNumfiles >= file_num) goto end_files;
end_of_loop1:
beginptr = endptr = endptr + 1; /* skip over '\n' */
filesseen ++;
}
}
}
} /* one file per block */
else {
/* Just the outer for-loop and initial begin/end values are different: rest is same */
for (i=0; i<MAX_PARTITION; i++) {
if(index_vect[i] > 0) {
start = p_table[i];
end = p_table[i+1];
if (start >= end) continue;
#if BG_DEBUG
fprintf(debug, "start=%d, end=%d\n", start, end);
#endif /*BG_DEBUG*/
/*
* skip over so many filenames and get the filenames to copy.
* NOTE: successive "start"s ALWAYS increase.
*/
while(filesseen < start) {
while(*beginptr != '\n') beginptr ++;
beginptr ++; /* skip over '\n' */
filesseen ++;
}
endptr = beginptr;
while (filesseen < end) {
while(*endptr != '\n') endptr ++;
if (endptr == beginptr + 1) goto end_of_loop2; /* null name of non-existent file */
*endptr = '\0';
/* return with all the names you COULD get */
if ((GTextfiles[GNumfiles] = (CHAR *)strdup(beginptr)) == NULL) {
*endptr = '\n';
fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__);
return;
}
GFileIndex[GNumfiles] = filesseen;
*endptr = '\n';
if (++GNumfiles >= file_num) goto end_files;
end_of_loop2:
beginptr = endptr = endptr + 1; /* skip over '\n' */
filesseen ++;
}
}
}
}
}
else { /* search and copy matched filenames */
extern int REGEX, FASTREGEX; /* agrep global which tells us whether the pattern is a regular expression or not */
int myREGEX, myFASTREGEX;
if ((dummylen = memagrep_init(argc, argv, MAX_PAT, dummypat)) <= 0) goto end_files;
ret = memagrep_search(dummylen, dummypat, dummylen*2, beginptr, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer);
myREGEX = REGEX; myFASTREGEX = FASTREGEX;
if (OneFilePerBlock) {
for (i=0; i<round(file_num, 8*sizeof(int)); i++) {
if (index_vect[i] == 0) continue;
for (j=0; j<8*sizeof(int); j++) {
if (!(index_vect[i] & mask_int[j])) continue;
start = i*8*sizeof(int) + j;
end = start + 1;
#if BG_DEBUG
fprintf(debug, "start=%d, end=%d\n", start, end);
#endif /*BG_DEBUG*/
/*
* skip over so many filenames and get the region to search =
* beginptr to endptr: NOTE: successive "start"s ALWAYS increase.
*/
while(filesseen < start) {
while(*beginptr != '\n') beginptr ++;
beginptr ++; /* skip over '\n' */
filesseen ++;
}
beginptr --; /* I need '\n' for memory search */
endptr = beginptr+1;
while (filesseen < end) {
while(*endptr != '\n') endptr ++;
endptr ++; /* skip over '\n' */
filesseen ++;
}
endptr --; /* I need '\n' for memory search */
if (endptr == beginptr + 1) goto end_of_loop3; /* null name of non-existent file */
#if BG_DEBUG
*endptr = '\0';
fprintf(debug, "From %d searching:\n%s\n", filesseen, beginptr+1);
*endptr = '\n';
#endif /*BG_DEBUG*/
/* if file in the partition matches then copy it */
if (myREGEX || myFASTREGEX) ret = memagrep_search(dummylen, dummypat, endptr-beginptr + 1, beginptr, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer);
else ret = memagrep_search(dummylen, dummypat, endptr-beginptr/* + 1*/, beginptr+1, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer);
if (ret > 0) {
#if BG_DEBUG
{
char c = outputbuffer[agrep_outpointer + 1];
outputbuffer[agrep_outpointer + 1] = '\0';
fprintf(debug, "OUTPUTBUFFER=%s\n", outputbuffer);
outputbuffer[agrep_outpointer + 1] = c;
}
#endif /*BG_DEBUG*/
k = prevk = 0;
while(k+1<agrep_outpointer) { /* name of a file cannot have '\n' in it */
k++;
if (outputbuffer[k] == '\n') {
outputbuffer[k] = '\0';
/* return with all the names you COULD get */
if ((GTextfiles[GNumfiles] = (CHAR *)strdup(outputbuffer+prevk)) == NULL) {
outputbuffer[k] = '\n';
fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__);
return;
}
outputbuffer[k] = '\n';
GFileIndex[GNumfiles] = i*8*sizeof(int)+j;
if (++GNumfiles >= file_num) goto end_files;
k = prevk = k+1;
}
}
}
else {
index_vect[i] &= ~mask_int[j]; /* remove it from the list: used if ByteLevelIndex */
}
end_of_loop3:
beginptr = endptr = endptr + 1;
}
}
} /* one file per block */
else {
/* Just the outer for-loop and initial begin/end values are different: rest is same */
for (i=0; i<MAX_PARTITION; i++) {
if(index_vect[i] > 0) {
start = p_table[i];
end = p_table[i+1];
if (start >= end) continue;
#if BG_DEBUG
fprintf(debug, "start=%d, end=%d\n", start, end);
#endif /*BG_DEBUG*/
/*
* skip over so many filenames and get the region to search =
* beginptr to endptr: NOTE: successive "start"s ALWAYS increase.
*/
while(filesseen < start) {
while(*beginptr != '\n') beginptr ++;
beginptr ++; /* skip over '\n' */
filesseen ++;
}
beginptr --; /* I need '\n' for memory search */
endptr = beginptr+1;
while (filesseen < end) {
while(*endptr != '\n') endptr ++;
endptr ++; /* skip over '\n' */
filesseen ++;
}
endptr --; /* I need '\n' for memory search */
if (endptr == beginptr + 1) goto end_of_loop4; /* null name of non-existent file */
#if BG_DEBUG
*endptr = '\0';
fprintf(debug, "From %d searching:\n%s\n", filesseen, beginptr+1);
*endptr = '\n';
#endif /*BG_DEBUG*/
/* if file in the partition matches then copy it */
if (myREGEX || myFASTREGEX) ret = memagrep_search(dummylen, dummypat, endptr-beginptr + 1, beginptr, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer);
else ret = memagrep_search(dummylen, dummypat, endptr-beginptr/* + 1*/, beginptr+1, FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE, outputbuffer);
if (ret > 0) {
k = prevk = 0;
while(k+1<agrep_outpointer) { /* name of a file cannot have '\n' in it */
k++;
if (outputbuffer[k] == '\n') {
outputbuffer[k] = '\0';
/* return with all the names you COULD get */
if ((GTextfiles[GNumfiles] = (CHAR *)strdup(outputbuffer+prevk)) == NULL) {
outputbuffer[k] = '\n';
fprintf(stderr, "Out of memory at: %s:%d\n", __FILE__, __LINE__);
return;
}
outputbuffer[k] = '\n';
GFileIndex[GNumfiles] = filesseen - 1; /* not sure here which one but this is never used so ok to fill junk */
if (++GNumfiles >= file_num) goto end_files;
k = prevk = k+1;
}
}
}
else {
index_vect[i] = 0; /* mask it off */
}
end_of_loop4:
beginptr = endptr = endptr + 1;
}
}
}
}
end_files:
#if BG_DEBUG
fprintf(debug, "The following %d filenames are ON\n", GNumfiles);
for (i=0; i<GNumfiles; i++)
fprintf(debug, "\t%s\n", GTextfiles[i]);
#endif /*BG_DEBUG*/
return;
}